library(tidyverse)
library(lubridate)
library(dplyr)
library(leaflet)
library(plotly)
library(viridis)
theme_set(theme_minimal() + theme(legend.position = "bottom"))
options(
ggplot2.continuous.colour = "viridis",
ggplot2.continuous.fill = "viridis"
)
scale_colour_discrete = scale_color_viridis_d
scale_fill_discrete = scale_fill_viridis_d
city_files = list.files("30_cities_data")
onehundrd_city_files = list.files("100_cities_data")
Time period we are interested in
period_18 = interval(ymd("2018-02-01"), ymd("2018-04-30"))
period_19 = interval(ymd("2019-02-01"), ymd("2019-04-30"))
period_20 = interval(ymd("2020-02-01"), ymd("2020-04-30"))
period_21 = interval(ymd("2021-02-01"), ymd("2021-04-30"))
The daily mean PM2.5 AQI from Feb to Aprl of year 2019 and year 2020 in each city.
city_period_meanPM25 =
tibble(city = character(),
mean_19 = numeric(),
mean_20 = numeric(),
mean_diff = numeric())
for (city_file in city_files) {
#print(city_file)
path = str_c("30_cities_data/", city_file)
city = strsplit(city_file, split = '-')[[1]][1]
cityAir = read_csv(path) %>%
mutate(date = as.Date(date, "%Y/%m/%d")) %>%
arrange(date)
cityAir_19 = cityAir %>%
filter(date %within% period_19)
cityAir_20 = cityAir %>%
filter(date %within% period_20)
mean_19 = mean(cityAir_19$pm25, na.rm = T)
mean_20 = mean(cityAir_20$pm25, na.rm = T)
mean_diff = mean_20 - mean_19
city_period_meanPM25 =
city_period_meanPM25 %>%
add_row(city = city,
mean_19 = mean_19,
mean_20 = mean_20,
mean_diff = mean_diff)
}
city_period_meanPM25 =
city_period_meanPM25 %>%
mutate(
city = paste(
toupper(substring(city, 1, 1)),
substring(city, 2),
sep = ""),
city = fct_reorder(city, mean_diff, .desc = T))
city_period_meanPM25 %>%
arrange(mean_diff) %>%
plot_ly(x = ~mean_diff,
y = ~city,
type = "bar",
color = ~city,
colors = viridis_pal(option = "D")(3)) %>%
layout(title = "Feb-Aprl Daily mean PM2.5 AQI Difference, 2020 minus 2019",
xaxis = list(title = "PM25 AQI Difference"),
yaxis = list(title = "City"))
Now we will see how the distribution of daily AQI differ between time period 2019 Feb-Aprl and 2020 Feb-Aprl.
city_PM25_Distribution = tibble()
for (city_file in city_files) {
path = str_c("30_cities_data/", city_file)
city = strsplit(city_file, split = '-')[[1]][1]
cityAir = read_csv(path) %>%
mutate(date = as.Date(date, "%Y/%m/%d")) %>%
arrange(date)
city_19 = cityAir %>%
filter(date %within% period_19) %>%
mutate(period = "2019Feb-Aprl",
day = format(date,"%m-%d"),
city = city) %>%
relocate(city, period, day)
#add a fake date "2019-02-29" with all AQI values as NA
city_19 =
city_19 %>%
add_row(city = city,
period = "2019Feb-Aprl",
day = "02-29") %>%
mutate(day = as.factor(day))
city_20 = cityAir %>%
filter(date %within% period_20) %>%
mutate(period = "2020Feb-Aprl",
day = format(date,"%m-%d"),
day = as.factor(day),
city = city) %>%
relocate(city, period, day)
city_PM25_Distribution = rbind(city_PM25_Distribution, city_19)
city_PM25_Distribution = rbind(city_PM25_Distribution, city_20)
}
city_PM25_Distribution =
city_PM25_Distribution %>%
mutate(period = factor(period, levels = c("2020Feb-Aprl", "2019Feb-Aprl")),
city = paste(
toupper(substring(city, 1, 1)),
substring(city, 2),
sep = ""))
#This way is stupid but it works! I tried other methods but they just don't work as expected!!!
city_PM25_Distribution %>%
plot_ly(
y = ~city, x = ~pm25, color = ~period, type = "box",
colors = c(rgb(0.2, 0.6, 0.8, 0.6), rgb(0.8, 0.2, 0.2, 0.6))) %>%
add_trace(
y = ~city, x = ~pm10, color = ~period, type = "box",
colors = c(rgb(0.2, 0.6, 0.8, 0.6), rgb(0.8, 0.2, 0.2, 0.6)), visible = FALSE) %>%
add_trace(
y = ~city, x = ~o3, color = ~period, type = "box",
colors = c(rgb(0.2, 0.6, 0.8, 0.6), rgb(0.8, 0.2, 0.2, 0.6)), visible = FALSE) %>%
add_trace(
y = ~city, x = ~no2, color = ~period, type = "box",
colors = c(rgb(0.2, 0.6, 0.8, 0.6), rgb(0.8, 0.2, 0.2, 0.6)), visible = FALSE) %>%
add_trace(
y = ~city, x = ~so2, color = ~period, type = "box",
colors = c(rgb(0.2, 0.6, 0.8, 0.6), rgb(0.8, 0.2, 0.2, 0.6)), visible = FALSE) %>%
add_trace(
y = ~city, x = ~co, color = ~period, type = "box",
colors = c(rgb(0.2, 0.6, 0.8, 0.6), rgb(0.8, 0.2, 0.2, 0.6)), visible = FALSE) %>%
layout(title = "PM 2.5",
xaxis = list(title = "Daily AQI"),
boxmode = "group",
updatemenus = list(
list(
y = 0.8,
buttons = list(
list(label = "PM25",
method = "update",
args = list(list(visible = c(T,T, F,F, F,F, F,F, F,F, F,F)),
list(title = "PM 2.5"))),
list(label = "PM10",
method = "update",
args = list(list(visible = c(F,F, T,T, F,F, F,F, F,F, F,F)),
list(title = "PM 10"))),
list(label = "O3",
method = "update",
args = list(list(visible = c(F,F, F,F, T,T, F,F, F,F, F,F)),
list(title = "O3"))),
list(label = "no2",
method = "update",
args = list(list(visible = c(F,F, F,F, F,F, T,T, F,F, F,F)),
list(title = "NO2"))),
list(label = "so2",
method = "update",
args = list(list(visible = c(F,F, F,F, F,F, F,F, T,T, F,F)),
list(title = "SO2"))),
list(label = "co",
method = "update",
args = list(list(visible = c(F,F, F,F, F,F, F,F, F,F, T,T)),
list(title = "CO")))
))
))
## Warning: Ignoring 60 observations
## Warning: Ignoring 88 observations
## Warning: Ignoring 433 observations
## Warning: Ignoring 60 observations
## Warning: Ignoring 120 observations
## Warning: Ignoring 330 observations
## Warning: 'layout' objects don't have these attributes: 'boxmode'
## Valid attributes include:
## '_deprecated', 'activeshape', 'annotations', 'autosize', 'autotypenumbers', 'calendar', 'clickmode', 'coloraxis', 'colorscale', 'colorway', 'computed', 'datarevision', 'dragmode', 'editrevision', 'editType', 'font', 'geo', 'grid', 'height', 'hidesources', 'hoverdistance', 'hoverlabel', 'hovermode', 'images', 'legend', 'mapbox', 'margin', 'meta', 'metasrc', 'modebar', 'newshape', 'paper_bgcolor', 'plot_bgcolor', 'polar', 'scene', 'selectdirection', 'selectionrevision', 'separators', 'shapes', 'showlegend', 'sliders', 'spikedistance', 'template', 'ternary', 'title', 'transition', 'uirevision', 'uniformtext', 'updatemenus', 'width', 'xaxis', 'yaxis', 'barmode', 'bargap', 'mapType'